#Author: Wojciech Gedek
#Article title: Universal targeting in local public health: a comparative study of Polish cities

##### Data Availability ##### 
# This work is based on two main data sources: 
# 1) Data supplied by Statistics Poland (the Local Data Bank database). 
# These data are permanently accessible at: 
# https://bdl.stat.gov.pl/bdl/start 
# 2) Data supplied by Poland's Ministry of Health (BASiW database), 
# which are permanently accessible at:
#https://basiw.mz.gov.pl/mapy-informacje/mapa-2022-2026/analizy/czynniki-ryzyka-i-profilaktyka/ 
# [available in Polish only]

# Users seeking to replicate this work should use the files uploaded 
# to the JPP Dataverse that correspond to this script (rawdata.csv, calibratedQCAtest.csv).    
# Alternatively, they may download and merge the relevant files directly 
# following the information provided in appendices to the article 
# and using the data sources cited above.   


##### Sources ##### 
# The procedure outlined below is based primarily on the following works: 
# 1) Oana, Ioana-Elena, and Carsten Q. Schneider. 2021. 
# ‘A Robustness Test Protocol for Applied QCA: Theory and R Software Application’. 
# Sociological Methods & Research: 1–32. 
#2) Oana, Ioana-Elena, Carsten Q. Schneider, and Eva Thomann. 2021. 
# Qualitative Comparative Analysis Using R: A Beginner’s Guide. Cambridge University Press.


#####Preparation#####
rm(list = ls())

library(psych); 
library(QCA); 
library(SetMethods);
library(stargazer)

# Setting working directory
setwd("C:/Users/wojge/OneDrive/Pulpit/Test")
getwd()

# Raw data upload:
mydata <- read.csv2("rawdata.csv", row.names=1, header = TRUE)
View(mydata)

##### Set calibration #####

  ### Outcome (UTI) - fuzzy-set index construction ###  
colnames(mydata)

# Index components: 
# (1) PHP_EXP - High per capita expenditure on public health programmes in 2019 in Polish Zloty
summary(mydata$php_exp)
mydata$PHP_EXP <- calibrate(mydata$php_exp, type = "fuzzy", thresholds = "e=10, c=19, i=60")
skew.check(mydata$PHP_EXP)

#(1' - alternative operationalisation) PHP_EXP_ALT - High per capita spending on universally targeted public health interventions in 2019 in Polish Zloty
summary(mydata$php_exp_uni)
mydata$PHP_EXP_ALT <- calibrate(mydata$php_exp_uni, type = "fuzzy", thresholds = "e=2, c=7.9, i= 10")
skew.check(mydata$PHP_EXP_ALT)

#(2) UNI_COV_A - The proportion of participants in programmes belonging to the 'universal prevention’ category 
#(a legal designation used for statistical purposes in the National Health Programme) in 2019. 
summary(mydata$uni_cov_a)
mydata$UNI_COV_A <- calibrate(mydata$uni_cov_a, type = "fuzzy", thresholds = "e=15, c=50, i=80")
skew.check(mydata$UNI_COV_A)

#(3) UNI_COV_B - The proportion of participants in programmes that addressed more than 1% of the city's population
summary(mydata$uni_cov_b)
mydata$UNI_COV_B <- calibrate(mydata$uni_cov_b, type = "fuzzy", thresholds = "e=50, c=70, i=90")
skew.check(mydata$UNI_COV_B)

# Final fsUTI composition:
mydata$U1 <- compute("PHP_EXP*UNI_COV_A", data=mydata)
mydata$U2 <- compute("PHP_EXP*UNI_COV_B", data=mydata)
mydata$OUT <- compute("U1 + U2", data=mydata)
skew.check(mydata$OUT) 

  ### Conditions ###

# High population in thousands in 2019
summary(mydata$pop)
mydata$POP <- calibrate(mydata$pop, type = "fuzzy", thresholds = "e=150, c=300, i= 400")
skew.check(mydata$POP)

# High median age of the population in 2019  
summary(mydata$agemed)
mydata$AGEMED <- calibrate(mydata$agemed, type = "fuzzy", thresholds = "e=41.5, c=42.7, i= 44.1")
skew.check(mydata$AGEMED)

# High average death rate calculated for 1000 inhabitants between 2016 and 2019
summary(mydata$death)
mydata$DEATH <- calibrate(mydata$death, type = "fuzzy", thresholds = "e=9, c=10.8, i=12.5")
skew.check(mydata$DEATH)

# High age dependency ratio calculated as an average between 2018 and 2019 
summary(mydata$dependency_ratio)
mydata$DEPENDENCY_RATIO <- calibrate(mydata$dependency_ratio, type = "fuzzy", thresholds = "e=29, c=31.8, i=34")
skew.check(mydata$DEPENDENCY_RATIO)

# High average per capita municipal ‘own income’ between 2017 and 2019 in Polish zloty
summary(mydata$affl)
mydata$AFFL <- calibrate(mydata$affl, type = "fuzzy", thresholds = "e=3000, c=3700, i= 4000")
skew.check(mydata$AFFL)

# Substantial average representation of women on a city council in 2019. 
summary(mydata$wmr)
mydata$WMR <- calibrate(mydata$wmr, type = "fuzzy", thresholds = "e=25, c=30, i=40")
skew.check(mydata$WMR)

# High average number of participants in local civic organisations (clubs, sections) per 1000 inhabitants between 2017 and 2019
summary(mydata$particip)
mydata$PARTICIP <- calibrate(mydata$particip, type = "fuzzy", thresholds = "e=5, c=8, i=15")
skew.check(mydata$PARTICIP)

# High average turnout at two consecutive local elections in 2014 and 2018
summary(mydata$turnout)
mydata$TURNOUT <- calibrate(mydata$turnout, type = "fuzzy", thresholds = "e=43, c=48, i=50")
skew.check(mydata$TURNOUT)

### Higher-order sets ### 
# SOCAP/CIVSOC - High social capital disjunction: 
mydata$SOCAP <- compute("PARTICIP + TURNOUT", data=mydata)
skew.check(mydata$SOCAP)
# POORHEALTH - Poor population health conjuction (main version): 
mydata$POORHEALTH <- compute("AGEMED * DEATH", data=mydata)
skew.check(mydata$POORHEALTH)
# POORHEALTH_ALT - Poor population health conjuction (alternative)
mydata$POORHEALTH_ALT <- compute("DEPENDENCY_RATIO * DEATH", data=mydata)
skew.check(mydata$POORHEALTH_ALT)


#### Saving calibration ####
calibrated <- mydata[, c("OUT", "POP","AFFL", "AGEMED","DEATH", "TURNOUT",
"PARTICIP", "WMR", "DEPENDENCY_RATIO", "SOCAP", "POORHEALTH", "POORHEALTH_ALT")]
View(calibrated)                   
write.csv2(calibrated, file = "calibratedQCAtest.csv")


##### Necessity Analysis #####
# Necessary conditions 
mydata <- read.csv2("calibratedQCAtest.csv", row.names=1, header = TRUE)
View(mydata)
skew.check(mydata$OUT)
nconds <- mydata[, c("POP", "POORHEALTH", "AFFL", "WMR", "SOCAP")]
# FOR OUT
QCAfit(nconds, mydata$OUT, necessity = TRUE,neg.out = FALSE) 

# FOR ~OUT
QCAfit(nconds, mydata$OUT, necessity = TRUE,neg.out = TRUE)

#SUIN conditions

NC <- superSubset(mydata, outcome = "OUT", 
                  conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP",
                  incl.cut = 0.9, cov.cut = 0.6, ron.cut=0.5)
NC #NULL

NNC <- superSubset(mydata, outcome = "~OUT", 
                   conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP", 
                   incl.cut = 0.9, cov.cut = 0.6, ron.cut=0.5)
NNC # POORHEALTH + ~WMR



##### Sufficiency analysis #####
mydata <- read.csv2("calibratedQCAtest.csv", row.names=1, header = TRUE)
View(mydata)
ttOUT <- truthTable(data=mydata, outcome = "OUT", 	
                    conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP", 
                    sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE)

ttOUT


## For OUT (presence of the outcome) ###
# Conservative result  
cout <- minimize(ttOUT, details=TRUE, show.cases=TRUE)
cout

# Parsimonious result 
psout <- minimize(ttOUT, include = "?", details = TRUE, row.dom=FALSE,
                  show.cases = TRUE)
psout

# Intermediate result
isout <- minimize(ttOUT, include = "?", details = TRUE, 
                  show.cases = TRUE, dir.exp = "POP, AFFL, WMR, SOCAP")

isout 

#### For ~OUT (absence of the outcome)
ttOUT2 <- truthTable(data=mydata, outcome = "~OUT", 	
                     conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP", 
                     sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE)

ttOUT2
# Cons. result 
cout2 <- minimize(ttOUT2, details=TRUE, show.cases=TRUE)
cout2
# Pars. result
psout2 <- minimize(ttOUT2, include = "?", details = TRUE, row.dom=FALSE,
                   show.cases = TRUE)
psout2

# Interm. result
isout2 <- minimize(ttOUT2, include = "?", details = TRUE, 
                   show.cases = TRUE, dir.exp = "~POP, ~AFFL, ~WMR, ~SOCAP")

isout2

### Exporting the truth table ###
write.csv2(ttOUT$tt, "ttOUT.csv")
write.csv2(ttOUT2$tt, "ttOUT2.csv")


### Exporting the solutions to HTML files ###

stargazerSol(results = isout, outcome = "OUT",
             type = "html", show.cases=TRUE, out = "OUTCOME.html")

stargazerSol(results = isout2, outcome = "~OUT",
             type = "html", show.cases=TRUE, out = "OUTCOME2.html")

##### Robustness tests #####

### Preparation ### 
conds <- c("POP", "POORHEALTH", "AFFL", "WMR", "SOCAP")
rawdata <- read.csv2("rawdata.csv", row.names=1, header = TRUE)
View(rawdata)

### Sensitivity ranges ###  
rob.inclrange(data = mydata, step = 0.01, max.runs = 20, outcome = "OUT", conditions = conds, incl.cut = 0.8, n.cut = 2, include = "?")
rob.inclrange(data = mydata, step = 0.01, max.runs = 20, outcome = "~OUT", conditions = conds, incl.cut = 0.8, n.cut = 2, include = "?") 
# OUT: Lower bound 0.74, Upper bound 0.93 
# ~OUT: Lower bound 0.74, Upper Bound 0.88

#Higher inclusion criteria results
ttOUTtest1 <- truthTable(data=mydata, outcome = "OUT", 	
                         conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP", 
                         sort.by="incl, n", incl.cut = 0.85, show.cases=TRUE, complete=TRUE, dcc=TRUE) # complete = False [tabela bez logical reminders]; 

ttOUTtest1

isout_test1 <- minimize(ttOUTtest1, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "POP, POORHEALTH, AFFL, WMR, SOCAP")

isout_test1 

ttOUTtest2 <- truthTable(data=mydata, outcome = "~OUT", 	
                         conditions = "POP, POORHEALTH, AFFL, WMR, SOCAP", 
                         sort.by="incl, n", incl.cut = 0.85, show.cases=TRUE, complete=TRUE, dcc=TRUE)

ttOUTtest2

isout_test2 <- minimize(ttOUTtest2, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "~POP, ~AFFL, ~WMR, ~SOCAP")

isout_test2

### Alternative QCA solutions ###

### Re-calibration of conditions ### 
# POP
summary(rawdata$pop)
mydata$POP_test <- calibrate(rawdata$pop, type = "fuzzy", thresholds = "e=180, c=390, i= 600")
skew.check(mydata$POP_test) 

# AGEMED
summary(rawdata$agemed)
mydata$AGEMED_test <- calibrate(rawdata$agemed, type = "fuzzy", thresholds = "e=41.5, c=43.4, i= 44.7")
skew.check(mydata$AGEMED_test) 

# DEATH
summary(rawdata$death)
mydata$DEATH_test <- calibrate(rawdata$death, type = "fuzzy", thresholds = "e=10, c=11.3, i=13")
skew.check(mydata$DEATH_test)

# AFFL
summary(rawdata$affl)
mydata$AFFL_test <- calibrate(rawdata$affl, type = "fuzzy", thresholds = "e=3000, c=3900, i= 4600")
skew.check(mydata$AFFL_test) 

# PARTICIP
summary(rawdata$particip)
mydata$PARTICIP_test <- calibrate(rawdata$particip, type = "fuzzy", thresholds = "e=4, c=10, i=18")
skew.check(mydata$PARTICIP_test)

# WMR
summary(rawdata$wmr)
mydata$WMR <- calibrate(rawdata$wmr, type = "fuzzy", thresholds = "e=25, c=30, i=40")
skew.check(mydata$WMR)
# TURNOUT 
summary(rawdata$turnout)
mydata$TURNOUT_test <- calibrate(rawdata$turnout, type = "fuzzy", thresholds = "e=45, c=50, i=52")
skew.check(mydata$TURNOUT_test) 

# POORHEALTH #1
mydata$POORHEALTH_test <- compute("AGEMED_test * DEATH_test", data=mydata)
skew.check(mydata$POORHEALTH_test)

# POORHEALTH #2
mydata$POORHEALTH_test2 <- mydata$POORHEALTH_ALT

# SOCAP
mydata$SOCAP_test <- compute("PARTICIP_test + TURNOUT_test", data=mydata)
skew.check(mydata$SOCAP_test)

### Alternative tests for OUT ###
ttOUTtest3 <- truthTable(data=mydata, outcome = "OUT", 	
                         conditions = "POP_test, POORHEALTH_test, AFFL_test, WMR, SOCAP_test", 
                         sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE)
ttOUTtest3


isout_test3 <- minimize(ttOUTtest3, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "POP_test, AFFL_test, WMR, SOCAP_test")

isout_test3 

  # Comparisons
RF <- rob.fit(test_sol=isout_test3,initial_sol = isout,outcome = "OUT") 
RF 

rob.xyplot(test_sol = isout_test3, initial_sol = isout, outcome = "OUT", all_labels = TRUE, fontsize = 5 , jitter=TRUE, area_lab=TRUE)


### Alternative tests for ~OUT ###


ttOUTtest4 <- truthTable(data=mydata, outcome = "~OUT", 	
                         conditions = "POP_test, POORHEALTH_test, AFFL_test, WMR, SOCAP_test", 
                         sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE) # complete = False [tabela bez logical reminders]; 

ttOUTtest4 

isout_test4 <- minimize(ttOUTtest4, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "~POP_test, ~AFFL_test, ~WMR, ~SOCAP_test")

isout_test4 


  # Comparisons for ~OUT 

RF <- rob.fit(test_sol= isout_test4, initial_sol = isout2, outcome = "~OUT") 
RF 

rob.xyplot(test_sol = isout_test4, initial_sol = isout2, outcome = "~OUT", all_labels = TRUE, fontsize = 5 , jitter=TRUE, area_lab=TRUE)


### Alternative tests for POORHEALTH_test2 (alternative operationalisation) ###
#For OUT
ttOUTtest5 <- truthTable(data=mydata, outcome = "OUT", 	
                         conditions = "POP_test, POORHEALTH_test2, AFFL_test, WMR, SOCAP_test", 
                         sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE)

ttOUTtest5

isout_test5 <- minimize(ttOUTtest5, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "POP_test, AFFL_test, WMR, SOCAP_test")

isout_test5 

RF <- rob.fit(test_sol= isout_test5, initial_sol = isout, outcome = "OUT") 
RF 
rob.xyplot(test_sol = isout_test5, initial_sol = isout, outcome = "OUT", all_labels = TRUE, fontsize = 5 , jitter=TRUE, area_lab=TRUE)

#For ~OUT
ttOUTtest6 <- truthTable(data=mydata, outcome = "~OUT", 	
                         conditions = "POP_test, POORHEALTH_test2, AFFL_test, WMR, SOCAP_test", 
                         sort.by="incl, n", incl.cut = 0.8, show.cases=TRUE, complete=TRUE, dcc=TRUE) 

ttOUTtest6
isout_test6 <- minimize(ttOUTtest6, include = "?", details = TRUE, 
                        show.cases = TRUE, dir.exp = "POP_test, AFFL_test, WMR, SOCAP_test")

isout_test6 

RF <- rob.fit(test_sol= isout_test6, initial_sol = isout2, outcome = "~OUT") 
RF 
rob.xyplot(test_sol = isout_test6, initial_sol = isout2, outcome = "~OUT", all_labels = TRUE, fontsize = 5 , jitter=TRUE, area_lab=TRUE)
